#!/usr/bin/env bash

# Show status of Slurm job(s).
# Both queue information and accounting information is printed.
# Homepage: https://github.com/OleHolmNielsen/Slurm_tools/

# Check number of arguments
if test $# -ne 1 
then
	echo "Usage: $0 <jobid>"
	exit 1
fi
# Check that jobid is a number (possibly with _ separating job array_task_id)
if ! [[ ${1} =~ ^[0-9_]+$ ]]
then
	echo "ERROR: <jobid> must be a number or jobid_arraytaskid"
	echo "Usage: $0 <jobid>"
	exit 1
fi

# Unset any SLURM_TIME_FORMAT environment variable
unset SLURM_TIME_FORMAT

jobid=$1
TMPFILE=/tmp/showjob.$$
# Check the showuserlimits command
showuserlimits=`which showuserlimits`
if test -z "$showuserlimits"
then
	echo Did not find the showuserlimits command, please install it.
	showuserlimits="echo showuserlimits"
fi

###########################################################################
# Execute sacct and print nicely formatted output

function format_sacct()
{
	# echo Called format_sacct with $@
	# Add -P flag for parseable output with fields separated by "|"
	sacct -P $@ | awk -F "|" '
{
	for (i=1; i<= NF; i++) {
		column[i][NR] = $i
		len = length($i)
		if (len > colwidth[i]) colwidth[i] = len	# Column width
	}
	cols=NF
	lines=NR
} END {
	# Print a header
	for (i=1; i<= cols; i++) {
		printf("%*s ", colwidth[i], column[i][1])
	}
	printf("\n")
	for (i=1; i<= cols; i++) {
		printf("%.*s ", colwidth[i], "-----------------------------------------------------")
	}
	printf("\n")
	# Print lines
	for (l=2; l<=lines; l++) {
		for (i=1; i<= cols; i++) {
			printf("%*s ", colwidth[i], column[i][l])
		}
		printf("\n")
	}
}'

}

# Get job accounting information
function print_job_accounting()
{
	if test -z "`sacct -nP -o JobID -j $jobid.batch`"
	then
		echo
		echo ERROR: Invalid JobID: $jobid is not found in the Slurm database
		exit 1
	fi
	echo
	echo "Accounting information from the Slurm database:"
	echo
	echo "Job parameters for jobid $jobid:"
	export jobvars="jobid,jobname,user,account,partition,Timelimit"
	format_sacct -j $jobid -o $jobvars
	echo
	echo "Job details information for jobid $jobid:"
	export jobvars="jobid,Submit,Eligible,Start,elapsed,End,CPUTime,NNodes,NCPUS,ExitCode,nodelist"
	format_sacct -j $jobid -o $jobvars
	echo
	echo "Workdir"
	echo "-------"
	sacct -j $jobid -nP -o WorkDir | head -1
	# Get user fairshare information
	export jobuser=`sacct -nP -j $jobid -o user`
	echo
	echo "User fairshare information from the sshare command:"
	sshare -lUm -u $jobuser
}

###########################################################################
#
# Get Slurm queue information
#

# Check if job is in the queue and squeue displays job information
jobfound=`squeue -hO JobID -j $jobid 2>/dev/null`
if test $? -ne 0 -o -z "$jobfound"
then
	echo
	echo Job $jobid is not in the current Slurm queue
	print_job_accounting
	exit 1
fi

# Check if this jobid can be inquired successfully.
JOBSTATE="`squeue -h -O State -j $jobid`"
if test "$?" != "0"
then
	echo Error inquiring about job $jobid 
	exit 1
fi

# Detect job arrays by counting number of words in JOBSTATE
words=( $JOBSTATE )
if [[ ${#words[@]} > 1 ]]
then
	echo "ERROR: The job $jobid is a job array with multiple jobs. Please select only one of the array jobs:"
	squeue -j $jobid
	exit 1
fi

# 
# Print basic job information
#
# Get required job information from a single squeue command:
# eval `squeue -ho "%u %a %P %T %C %l %r %V %E" -j $jobid | awk '{
eval `squeue -hO "UserName,Account,Partition,State,NumCPUs,TimeLimit,Reason: ,SubmitTime,Dependency" -j $jobid | awk '{
	print "jobuser=" $1
	print "jobaccount=" $2
	print "partition=" $3
	print "jobstate=" $4
	print "NCPUS=" $5
	print "timelimit=" $6
	print "reason=" $7
	print "submittime=" $8	# Note: Time %V may modified by SLURM_TIME_FORMAT
	print "dependency=" "$9"	# Note: Dependency %E may be a null string and must be last
}'`
# Get time limit as a number of minutes
minutes=`echo $timelimit | awk -F- '
{
	if (NF == 1)
		# If timelimit has 0 days there is only the hour:minute:second field
		hms = $1
	else {
		days = $1
		hms = $2
	}
	split(hms,a,":")	# Split hour:minute:second
	print days*24*60 + a[1]*60 + a[2]	# Print number of minutes
}'`
echo "Job $jobid was submitted by user $jobuser in account $jobaccount on $submittime"
echo "Job has state=$jobstate"
echo "Job requests $NCPUS CPUs and has a time limit of $timelimit (days-hh:mm:ss) = $minutes min."
echo "Job TRESRunMin: $((NCPUS*minutes))"
echo

# Display job reason code (if other than "None")
if test "$reason" != "None"
then
	echo "Job is in state $jobstate with reason=$reason"
	if test "$reason" = "AssocGrpCPURunMinutesLimit"
	then
		echo
		echo Information about $reason:
		JobValue=$((NCPUS*minutes))
		echo "This job requires TRESRunMins:   cpu=$JobValue"
		echo
		echo Information about Account and User limits:
		$showuserlimits -u $jobuser -A $jobaccount -p $partition -l GrpTRESRunMins -s cpu
	elif test "$reason" = "AssocGrpCPUMinutesLimit"
	then
		echo
		echo Information about $reason:
		echo "This job requires TRESMins:    cpu=$((NCPUS*minutes))"
		echo
		echo Information about Account and User limits:
		$showuserlimits -u $jobuser -A $jobaccount -p $partition -l GrpTRESMins -s cpu
	elif test "$reason" = "AssocGrpCpuLimit"
	then
		echo
		echo Information about $reason:
		echo -n "Current user TRES is:      "
		# Count number of CPUs for running jobs
		squeue -hO NumCPUs -u $jobuser -t running | awk '{ncpus+=$1}END{print "cpu=" ncpus}'
		NCPUS=`squeue -hO NumCPUs -j $jobid`
		echo "This job requires TRES:    cpu=$NCPUS"
		echo
		echo Information about Account and User limits:
		$showuserlimits -u $jobuser -A $jobaccount -p $partition -l GrpTRES -s cpu
	elif test "$reason" = "AssocGrpGRES"
	then
		echo
		echo Information about $reason:
		gres=`squeue -hO tres-per-job -j $jobid`
		grestype=`echo $gres | awk -F: '{print $1}'`
		echo "This job requires GRES: $gres"
		echo
		echo Information about Account and User limits:
		$showuserlimits -u $jobuser -A $jobaccount -p $partition -l GrpTRES -s $grestype
	elif test "$reason" = "AssocMaxJobsLimit"
	then
		echo
		echo Information about $reason:
		echo User $jobuser number of running jobs is: `squeue -hO JobID -u $jobuser -t running | wc -l`
		echo Information about Account and User limits:
		$showuserlimits -u $jobuser -A $jobaccount -p $partition -l MaxJobs
	elif test "$reason" = "Dependency"
	then
		echo
		echo "Dependency information: $dependency"
	fi
	echo
fi

# Job found in the queue
echo "Queued job information:"
scontrol --details show job $jobid | tee $TMPFILE
# Parse the job status information (read E-mail)
export jobscript=`grep Command= $TMPFILE | sed 's/^   Command=//' | awk '{print $1}'`
# Check that jobscript is defined and that the file is readable
if test "$jobscript" != "(null)" -a -s "$jobscript"
then
	grep "mail-user" $jobscript > $TMPFILE
	if test -s $TMPFILE
	then
		echo Job script E-mail has been set:
		cat $TMPFILE
	fi
fi
rm -f $TMPFILE

# Print accounting information only when the job is in the Running state
if test "$jobstate" = "Running"
then
	print_job_accounting
fi
